Start

library(sf)
library(stars)
library(caret)
library(CAST)
library(sen2r)
library(tmap)
library(knitr)

We will first fix the random number seed, to get identical results for procedures that involve randomness. Remove this command if you want the random effect in outcomes.

set.seed(131)

Get required data

Load training polygons

trainsites <- st_read("../data/trainingsites_muenster.gpkg")
## Reading layer `trainingsites_muenster' from data source `/home/hanna/Documents/Github/HannaMeyer/IGARSS_21/data/trainingsites_muenster.gpkg' using driver `GPKG'
## Simple feature collection with 29 features and 3 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: 400682 ymin: 5754417 xmax: 408327.9 ymax: 5760113
## projected CRS:  WGS 84 / UTM zone 32N

Download Sentinel-2 data

list_safe <- s2_list(
  spatial_extent = st_as_sfc(st_bbox(trainsites)),
  tile = "32ULC",
  level = "L1C",
  time_interval = as.Date(c("2020-04-26", "2020-04-28"))) # ursprüngliche szene: c("2019-04-17", "2019-04-19")

#s2_download(list_safe[1], outdir="../data/")
sen_id <- names(list_safe[1])

read Sentinel data

bands <- c("B04", "B03", "B02", "B08")#, "B05", "B06", "B07", "B8A", "B11", "B12")

s2 <- list.files(paste0("../data/",sen_id),
                 recursive=TRUE,
                 full.names = TRUE,
                 pattern=".jp2")
# match band name to file name:
m = match(paste0(bands,".jp2"), substr(s2,nchar(s2)-6,nchar(s2)))
s2 <- s2[m]

sen <- read_stars(s2, proxy = TRUE, NA_value = 0) %>%
  setNames(bands)

Create Training data

pts <- st_as_sf(st_sample(trainsites, 200, "random"))
pts <- st_intersection(pts,st_make_valid(trainsites))

trainDat <- st_extract(sen, pts) %>%
  st_intersection(pts)%>%
  data.frame()


trainDat <- data.frame(trainDat,pts)
trainDat$Label <- as.factor(pts$Label)

Train model and predict

ind <- CreateSpacetimeFolds(trainDat,spacevar="PolygonID",class="Label",k=3)
ctrl <- trainControl(method="cv",index=ind$index,indexOut = ind$indexOut,savePredictions = TRUE)

model <- train(trainDat[,attributes(sen)$names],
              trainDat$Label,
              tuneLength = length(attributes(sen)$names)-1,
              trControl = ctrl,
              method="rf",ntree=200)


prediction <- predict(sen, model)

Spatial (cross-)validation

confusionMatrix(model$pred$pred[model$pred$mtry==model$bestTune$mtry],
      model$pred$obs[model$pred$mtry==model$bestTune$mtry])
## Confusion Matrix and Statistics
## 
##                Reference
## Prediction      Fallow field Grassland Inland water Mixed forest Planted field
##   Fallow field             0         0            0            1            19
##   Grassland                0         8            0            0             2
##   Inland water             0         0           18            0             0
##   Mixed forest             1         0            0           46             1
##   Planted field           25         1            0            0             3
##   Urban                    2         0            3            0            13
##                Reference
## Prediction      Urban
##   Fallow field      0
##   Grassland         0
##   Inland water      0
##   Mixed forest      0
##   Planted field     3
##   Urban            54
## 
## Overall Statistics
##                                           
##                Accuracy : 0.645           
##                  95% CI : (0.5744, 0.7112)
##     No Information Rate : 0.285           
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5477          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: Fallow field Class: Grassland Class: Inland water
## Sensitivity                       0.0000           0.8889              0.8571
## Specificity                       0.8837           0.9895              1.0000
## Pos Pred Value                    0.0000           0.8000              1.0000
## Neg Pred Value                    0.8444           0.9947              0.9835
## Prevalence                        0.1400           0.0450              0.1050
## Detection Rate                    0.0000           0.0400              0.0900
## Detection Prevalence              0.1000           0.0500              0.0900
## Balanced Accuracy                 0.4419           0.9392              0.9286
##                      Class: Mixed forest Class: Planted field Class: Urban
## Sensitivity                       0.9787              0.07895       0.9474
## Specificity                       0.9869              0.82099       0.8741
## Pos Pred Value                    0.9583              0.09375       0.7500
## Neg Pred Value                    0.9934              0.79167       0.9766
## Prevalence                        0.2350              0.19000       0.2850
## Detection Rate                    0.2300              0.01500       0.2700
## Detection Prevalence              0.2400              0.16000       0.3600
## Balanced Accuracy                 0.9828              0.44997       0.9107

Estimate the AOA

AOA <-  merge(sen)%>% 
  split() %>%
  st_as_stars(downsample=c(10,10))%>%
  setNames(bands)%>%
  aoa(model)

Visualize RGB, Prediction, and Prediction within the AOA

png("../figures/rgb_all.png", width=15, height=15,units = "cm",res=300)
plot(st_rgb(merge(st_as_stars(sen,downsample=c(10,10)))[,,,c(1,2,3)], stretch = TRUE, probs = c(.01, .99)),main=NULL)
invisible(dev.off())

cols <- rev(c("red","lightgreen","forestgreen","blue","green","beige"))




prediction_st = st_as_stars(prediction, downsample = c(10,10))


map1 <- tm_shape(prediction_st, raster.downsample = FALSE) +
  tm_raster(palette = cols,title = "LUC")+
  tm_scale_bar(bg.color="white")+
  tm_layout(legend.position = c("left","top"),
            legend.bg.color = "white",
            legend.bg.alpha = 0.6,
            legend.text.size = 1.5,
            legend.title.size = 1.5)


prediction_st[AOA[2] == 0] = NA

map1_AOA <- tm_shape(prediction_st, raster.downsample = FALSE) +
  tm_raster(palette = cols,title = "LUC")+
  tm_scale_bar(bg.color="white")+
  tm_layout(legend.position = c("left","top"),
            legend.bg.color = "white",
            legend.bg.alpha = 0.6,
            bg.color="black",
            legend.text.size = 1.5,
            legend.title.size = 1.5)+
  tm_add_legend(type = "fill",
                col="black",
                labels = "Outside AOA")



tmap_save(map1, "../figures/LUC_map.png")
tmap_save(map1_AOA, "../figures/LUC_map_AOA.png")

And the same with focus on the training area Münster:

sen_crop <- st_crop(sen,st_bbox(trainsites))%>%
  st_as_stars()%>%
  merge()

prediction_crop <- st_crop(prediction,st_bbox(trainsites))


AOA_ms <- aoa(sen_crop, model)
prediction_st_crop = st_as_stars(prediction_crop)


png("../figures/rgb_ms.png", width=15, height=11,units = "cm",res=300)
plot(st_rgb(sen_crop[,,,c(1,2,3)], stretch = TRUE, probs = c(.01, .99)),main=NULL,reset = FALSE)
plot((st_geometry(trainsites)),add=TRUE,border="red",lwd=3)
invisible(dev.off())


map1_ms <-tm_shape(prediction_st_crop, raster.downsample = FALSE) +
  tm_raster(palette = cols,title = "LUC")+
  tm_scale_bar(bg.color="white")+
  tm_layout(legend.position = c("left","top"),
            legend.bg.color = "white",
            legend.bg.alpha = 0.6,
            legend.text.size = 1.5,
            legend.title.size = 1.5)


prediction_st_crop[AOA_ms[2] == 0] = NA

map1_AOA_ms <-tm_shape(prediction_st_crop, raster.downsample = FALSE) +
  tm_raster(palette = cols,title = "LUC")+
  tm_scale_bar(bg.color="white")+
  tm_layout(legend.position = c("left","top"),
            legend.bg.color = "white",
            legend.bg.alpha = 0.6,
            bg.color="black",
            legend.text.size = 1.5,
            legend.title.size = 1.5)+
  tm_add_legend(type = "fill",
                col="black",
                labels = "Outside AOA")


tmap_save(map1_ms, "../figures/LUC_map_ms.png")
tmap_save(map1_AOA_ms, "../figures/LUC_map_ms_AOA.png")

Write results

Run this code only if you want to write the results. It takes a while!

#write_stars(AOA,"../data/AOA.tif",layer=2)

#write_stars(prediction,"../data/prediction.tif",
#            chunk_size=c(dim(prediction)[1], 
#                         floor(2.5e+06/dim(prediction)[1])))